{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "initial_id",
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5\n",
      "Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans.\n",
      "\n",
      "Last year COVID-19 kept us apart. This year we are finally together again.\n",
      "\n",
      "Tonight, we meet as Democrats Republicans and Independents. But most importantly as Americans.\n",
      "\n",
      "With a duty to one another to the American people to the Constitution.\n",
      "\n",
      "And with an unwavering resolve that freedom will always triumph over tyranny.\n",
      "\n",
      "Six days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated.\n",
      "\n",
      "He thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined.\n",
      "\n",
      "He met the Ukrainian people.\n",
      "\n",
      "From President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world.\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings\n",
    "from dotenv import load_dotenv\n",
    "\n",
    "load_dotenv()\n",
    "# Option 2: use an Azure OpenAI account with a deployment of an embedding model\n",
    "\n",
    "# Create embeddings and vector store instances\n",
    "embeddings: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings(\n",
    "    azure_deployment=os.getenv(\"AZURE_DEPLOYMENT_EMBEDDING\"),\n",
    "    openai_api_version=os.getenv(\"AZURE_OPENAI_API_VERSION\"),\n",
    "    azure_endpoint=os.getenv(\"AZURE_OPENAI_ENDPOINT\"),\n",
    "    api_key=os.getenv(\"AZURE_OPENAI_API_KEY\"),\n",
    ")\n",
    "\n",
    "# Uncomment the following line if you need to initialize FAISS with no AVX2 optimization\n",
    "# os.environ['FAISS_NO_AVX2'] = '1'\n",
    "\n",
    "from langchain_community.document_loaders import TextLoader\n",
    "from langchain_community.vectorstores import FAISS\n",
    "from langchain_openai import OpenAIEmbeddings\n",
    "from langchain_text_splitters import CharacterTextSplitter\n",
    "\n",
    "loader = TextLoader(\"../documents/state_of_the_union.txt\")\n",
    "documents = loader.load()\n",
    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
    "docs = text_splitter.split_documents(documents)\n",
    "# embeddings = OpenAIEmbeddings()\n",
    "db = FAISS.from_documents(docs, embeddings)\n",
    "print(db.index.ntotal)\n",
    "\n",
    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
    "docs = db.similarity_search(query)\n",
    "print(docs[0].page_content)\n",
    "\n",
    "\n"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-06-26T02:10:12.066843Z",
     "start_time": "2024-06-26T02:09:58.975538Z"
    }
   },
   "id": "4a7122b81323e1c4"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   },
   "id": "3f170035ee934c5f"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
